clear all
capture set more off

*******************************************************************************************************************************************
// Admin
*******************************************************************************************************************************************
/*
	// Set directory path
	foreach i in "C:/Users/Andy/Dropbox/projects/WWI German Discrimination"		///
				 "C:/Users/anf137/Dropbox/projects/WWI German Discrimination"	///
				 "Z:/Dropbox/projects/WWI German Discrimination"				{
		
				global path "`i'"
				confirmdir "$path"
				if `r(confirmdir)'==0 continue, break
	}

*/

/******************************************************************************************************************************************
Merge the IPUMS sample with the crosswalks from the Census linking project for 1910 and 1920
******************************************************************************************************************************************/

	* full count sample for 1910 and 1920 from IPUMS for men who were born in the U.S., Germany, Switzerland, or Austria
	use "$path/Replication/raw_data/usa_00081.dta"

	keep if year==1910
	gen histid_1910 = histid

	* merge with the crosswalk from the Census linking project
	merge 1:m histid_1910 using "$path/Replication/raw_data/crosswalk_1910_1920.dta"
	keep if _merge==3

	drop _merge

	* keep Germans in 1910
	preserve

		keep if bpl==426 | bpl==450 | bpl==453

		compress
		save "$path/Replication/cleaned_data/germans1910.dta", replace
	restore

	* keep Americans in 1910
	drop if bpl==426 | bpl==450 | bpl==453
	compress
	save "$path/Replication/cleaned_data/americans1910.dta", replace
	clear


	* now do the same for 1920
	use "$path/Replication/raw_data/usa_00081.dta"

	keep if year==1920
	gen histid_1920 = histid

	merge 1:m histid_1920 using "$path/Replication/raw_data/crosswalk_1910_1920.dta"
	keep if _merge==3

	drop _merge

	* keep Germans in 1920
	preserve

		keep if bpl==426 | bpl==450 | bpl==453

		compress
		save "$path/Replication/cleaned_data/germans1920.dta", replace
	restore

	* keep Americans in 1920
	drop if bpl==426 | bpl==450 | bpl==453
	compress
	save "$path/Replication/cleaned_data/americans1920.dta", replace
	clear
	


/******************************************************************************************************************
Append the 1910 and 1920 samples into a panel
******************************************************************************************************************/

* for Germans
	use "$path/Replication/cleaned_data/germans1910.dta"
	append using "$path/Replication/cleaned_data/germans1920.dta"

	* keep those who have exactly two observations
	egen id = group(histid_1910 histid_1920)
	gen n = 1
	egen sumn = sum(n), by(id)
	keep if sumn==2
	drop n sumn
	
	compress
	save "$path/Replication/cleaned_data/germans1910-20_linked.dta", replace

	erase "$path/Replication/cleaned_data/germans1910.dta"
	erase "$path/Replication/cleaned_data/germans1920.dta"

* for Americans
	use "$path/Replication/cleaned_data/americans1910.dta"
	append using "$path/Replication/cleaned_data/americans1920.dta"

	* keep those who have exactly two observations
	egen id = group( histid_1910 histid_1920)
	gen n = 1
	egen sumn = sum(n), by(id)
	keep if sumn==2
	drop n sumn
	
	compress
	save "$path/Replication/cleaned_data/americans1910-20_linked.dta", replace

	erase "$path/Replication/cleaned_data/americans1910.dta"
	erase "$path/Replication/cleaned_data/americans1920.dta"
	clear
